In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from collections import Counter
import sys
package_paths = ['visualisations']
for pth in package_paths:
    sys.path.append(pth)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
In [2]:
from initialize import Init
In [3]:
from utils import accuracy_lines, large_vi, mine, common_neurons_percentage_multiple, plot_distr, \
                      accuracy_dif, accuracy_dif_control, accuracy_dif2
In [4]:
from transformers import AutoTokenizer
from transformers import pipeline
2024-01-17 19:44:03.630302: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
In [5]:
# read 
path_1 = 'good_token100/'
good_1 = Init(path_1, 'ru')
path_2 = 'good_token200/'
good_2 = Init(path_2, 'ru')
path_3 = 'good_token300/'
good_3 = Init(path_3, 'ru')
In [6]:
path_1 = 'broken_token100/'
broken_1 = Init(path_1, 'ru')
path_2 = 'broken_token200/'
broken_2 = Init(path_2, 'ru')
path_3 = 'broken_token300/'
broken_3 = Init(path_3, 'ru')
In [7]:
accuracy_lines(good_1.scores_layers['ADJ_Gender'], good_2.scores_layers['ADJ_Gender'], 
               good_3.scores_layers['ADJ_Gender'], broken_1.scores_layers['ADJ_Gender'],
               broken_2.scores_layers['ADJ_Gender'], broken_3.scores_layers['ADJ_Gender'],
               'ADJ_Gender')

Visualizations¶

From neurons to layers they are in¶

Visualization of the distribution of the number of neurons (ranked in the top 20% by weight according to the overall ranking) by layers for two models¶

In [8]:
large_vi(mine(good_1.top_neurons, good_2.top_neurons, good_3.top_neurons, 'good'),
         mine(broken_1.top_neurons, broken_2.top_neurons, broken_3.top_neurons, 'broken'), 3, '1000000')
No description has been provided for this image
In [9]:
c = common_neurons_percentage_multiple(broken_1.ordered_neurons, broken_2.ordered_neurons)
fig = px.imshow(c, text_auto=True, labels=dict(x="Categories", y="Top N% of neurons"), template="seaborn", title ="Percentage of top-N% neurons overlap (comparison between BERTs after 1kk steps)")
fig.show()
In [10]:
c = common_neurons_percentage_multiple(good_1.ordered_neurons, broken_3.ordered_neurons)
fig = px.imshow(c, text_auto=True, labels=dict(x="Categories", y="Top N% of neurons"), template="seaborn", title ="Percentage of top-N% neurons overlap (comparison between BERTs after 1kk steps)")
fig.show()
In [11]:
plot_distr(good_1.top_neurons, good_2.top_neurons, good_3.top_neurons)
No description has been provided for this image
In [12]:
plot_distr(broken_1.top_neurons, broken_2.top_neurons, broken_3.top_neurons)
No description has been provided for this image

F1 comparison: good vs broken¶

In [13]:
accuracy_dif(good_1.scores, good_2.scores, good_3.scores, 
             broken_1.scores, broken_2.scores, broken_3.scores)

GOOD model (actual model test scores vs control task test scores)¶

In [14]:
accuracy_dif_control(good_1.scores, good_2.scores, good_3.scores,
                    good_1.scores_c, good_2.scores_c, good_3.scores_c)

BROKEN model (actual model test scores vs control task test scores)¶

In [15]:
accuracy_dif_control(broken_1.scores, broken_2.scores, broken_3.scores,
                    broken_1.scores_c, broken_2.scores_c, broken_3.scores_c)

Subset's of neurons¶

In [16]:
accuracy_dif2(good_1.scores,good_2.scores, good_3.scores,  
              good_1.scores_keep_top,good_2.scores_keep_top, good_3.scores_keep_top, 
              good_1.scores_keep_bot, good_2.scores_keep_bot, good_3.scores_keep_bot) 
In [17]:
accuracy_dif2(broken_1.scores, broken_2.scores,  broken_3.scores, 
              broken_1.scores_keep_top, broken_2.scores_keep_top, broken_3.scores_keep_top, 
              broken_1.scores_keep_bot,broken_2.scores_keep_bot,broken_3.scores_keep_bot)